# -*- coding: utf-8 -*-
import scrapy
from myproject.items import MyprojectItem
import requests

headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }


class QidianSpider(scrapy.Spider):
    name = 'qidian'
    allowed_domains = ['qidian.com']
    start_urls = ['https://www.qidian.com/all']

    #下面是得到下一页，可以用Rules，可惜我没看懂，很难受
    re_url = 'https://www.qidian.com/all?orderId=&style=1&pageSize=20&siteid=1&hiddenField=0&page='
    for page in range(2,10000):
        url = re_url + str(page)
        start_urls.append(url)


    def parse(self, response):
        contents = response.xpath("//ul[@class='all-img-list cf']/li")
        for content in contents:
            try:
                item = MyprojectItem()
                item['title'] = content.xpath(".//h4").xpath('string(.)').extract()
                item['url'] = content.xpath(".//h4/a/@href").extract()
                item['author'] = content.xpath(".//p[@class = 'author']/a[1]").xpath('string(.)').extract()
                item['sort'] = content.xpath(".//p[@class = 'author']/a[2]").xpath('string(.)').extract()
                item['status'] = content.xpath(".//p[@class = 'author']/span").xpath('string(.)').extract()
                item['intro'] = content.xpath(".//p[@class = 'intro']").xpath('string(.)').extract()
                item['img_book'] = content.xpath(".//a/img/@src").extract()
                yield item

            except Exception,e:
                print 'parse wrong！！！'
                continue